{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3da0c04f",
   "metadata": {},
   "source": [
    "# Data Loader for the Cell atlas of aqueous humor outflow"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "72d64f5b",
   "metadata": {},
   "source": [
    "Paper link: https://www.pnas.org/doi/10.1073/pnas.2001250117\n",
    "\n",
    "For data links use these or go to the subseries section on https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi with  GSE146188\n",
    "\n",
    "Mouse\n",
    "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE146nnn/GSE146186/suppl/GSE146186_Mouse_count_matrix.csv.gz \n",
    "\n",
    "Pig\n",
    "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE146nnn/GSE146187/suppl/GSE146187_Pig_count_matrix.csv.gz\n",
    "\n",
    "Human\n",
    "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE148nnn/GSE148371/suppl/GSE148371_Human_count_matrix.csv.gz\n",
    "\n",
    "MacaF\n",
    "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE148nnn/GSE148373/suppl/GSE148373_MacaF_count_matrix.csv.gz\n",
    "\n",
    "MacaM\n",
    "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE148nnn/GSE148374/suppl/GSE148374_MacaqueM_count_matrix.csv.gz\n",
    "\n",
    "Get each with command `wget -r '...' -O ...csv.gz`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3ad411d8",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING: combining -O with -r or -p will mean that all downloaded content\n",
      "will be placed in the single file you specified.\n",
      "\n",
      "--2022-11-14 11:34:44--  https://ftp.ncbi.nlm.nih.gov/geo/series/GSE146nnn/GSE146186/suppl/GSE146186_Mouse_count_matrix.csv.gz\n",
      "Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 165.112.9.228, 165.112.9.230, 2607:f220:41e:250::7, ...\n",
      "Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|165.112.9.228|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 13462876 (13M) [application/x-gzip]\n",
      "Saving to: ‘data/mouse.csv.gz’\n",
      "\n",
      "data/mouse.csv.gz   100%[===================>]  12.84M  6.39MB/s    in 2.0s    \n",
      "\n",
      "2022-11-14 11:34:46 (6.39 MB/s) - ‘data/mouse.csv.gz’ saved [13462876/13462876]\n",
      "\n",
      "FINISHED --2022-11-14 11:34:46--\n",
      "Total wall clock time: 2.7s\n",
      "Downloaded: 1 files, 13M in 2.0s (6.39 MB/s)\n",
      "WARNING: combining -O with -r or -p will mean that all downloaded content\n",
      "will be placed in the single file you specified.\n",
      "\n",
      "--2022-11-14 11:34:47--  https://ftp.ncbi.nlm.nih.gov/geo/series/GSE146nnn/GSE146187/suppl/GSE146187_Pig_count_matrix.csv.gz\n",
      "Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 165.112.9.228, 165.112.9.230, 2607:f220:41e:250::7, ...\n",
      "Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|165.112.9.228|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 21384361 (20M) [application/x-gzip]\n",
      "Saving to: ‘data/pig.csv.gz’\n",
      "\n",
      "data/pig.csv.gz     100%[===================>]  20.39M  7.41MB/s    in 2.8s    \n",
      "\n",
      "2022-11-14 11:34:50 (7.41 MB/s) - ‘data/pig.csv.gz’ saved [21384361/21384361]\n",
      "\n",
      "FINISHED --2022-11-14 11:34:50--\n",
      "Total wall clock time: 3.2s\n",
      "Downloaded: 1 files, 20M in 2.8s (7.41 MB/s)\n",
      "WARNING: combining -O with -r or -p will mean that all downloaded content\n",
      "will be placed in the single file you specified.\n",
      "\n",
      "--2022-11-14 11:34:50--  https://ftp.ncbi.nlm.nih.gov/geo/series/GSE148nnn/GSE148371/suppl/GSE148371_Human_count_matrix.csv.gz\n",
      "Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 165.112.9.228, 165.112.9.230, 2607:f220:41e:250::7, ...\n",
      "Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|165.112.9.228|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 52916704 (50M) [application/x-gzip]\n",
      "Saving to: ‘data/human.csv.gz’\n",
      "\n",
      "data/human.csv.gz   100%[===================>]  50.46M  11.1MB/s    in 7.1s    \n",
      "\n",
      "2022-11-14 11:34:58 (7.07 MB/s) - ‘data/human.csv.gz’ saved [52916704/52916704]\n",
      "\n",
      "FINISHED --2022-11-14 11:34:58--\n",
      "Total wall clock time: 7.8s\n",
      "Downloaded: 1 files, 50M in 7.1s (7.07 MB/s)\n",
      "WARNING: combining -O with -r or -p will mean that all downloaded content\n",
      "will be placed in the single file you specified.\n",
      "\n",
      "--2022-11-14 11:34:58--  https://ftp.ncbi.nlm.nih.gov/geo/series/GSE148nnn/GSE148373/suppl/GSE148373_MacaF_count_matrix.csv.gz\n",
      "Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 165.112.9.228, 165.112.9.230, 2607:f220:41e:250::7, ...\n",
      "Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|165.112.9.228|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 19629094 (19M) [application/x-gzip]\n",
      "Saving to: ‘data/macaF.csv.gz’\n",
      "\n",
      "data/macaF.csv.gz   100%[===================>]  18.72M  8.67MB/s    in 2.2s    \n",
      "\n",
      "2022-11-14 11:35:00 (8.67 MB/s) - ‘data/macaF.csv.gz’ saved [19629094/19629094]\n",
      "\n",
      "FINISHED --2022-11-14 11:35:01--\n",
      "Total wall clock time: 2.7s\n",
      "Downloaded: 1 files, 19M in 2.2s (8.67 MB/s)\n",
      "WARNING: combining -O with -r or -p will mean that all downloaded content\n",
      "will be placed in the single file you specified.\n",
      "\n",
      "--2022-11-14 11:35:01--  https://ftp.ncbi.nlm.nih.gov/geo/series/GSE148nnn/GSE148374/suppl/GSE148374_MacaqueM_count_matrix.csv.gz\n",
      "Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 165.112.9.228, 165.112.9.230, 2607:f220:41e:250::7, ...\n",
      "Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|165.112.9.228|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 10336871 (9.9M) [application/x-gzip]\n",
      "Saving to: ‘data/macaM.csv.gz’\n",
      "\n",
      "data/macaM.csv.gz   100%[===================>]   9.86M  6.26MB/s    in 1.6s    \n",
      "\n",
      "2022-11-14 11:35:03 (6.26 MB/s) - ‘data/macaM.csv.gz’ saved [10336871/10336871]\n",
      "\n",
      "FINISHED --2022-11-14 11:35:03--\n",
      "Total wall clock time: 2.0s\n",
      "Downloaded: 1 files, 9.9M in 1.6s (6.26 MB/s)\n"
     ]
    }
   ],
   "source": [
    "!wget -r 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE146nnn/GSE146186/suppl/GSE146186_Mouse_count_matrix.csv.gz' -O data/mouse.csv.gz\n",
    "!wget -r 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE146nnn/GSE146187/suppl/GSE146187_Pig_count_matrix.csv.gz' -O data/pig.csv.gz\n",
    "!wget -r 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE148nnn/GSE148371/suppl/GSE148371_Human_count_matrix.csv.gz' -O data/human.csv.gz\n",
    "!wget -r 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE148nnn/GSE148373/suppl/GSE148373_MacaF_count_matrix.csv.gz' -O data/macaF.csv.gz\n",
    "!wget -r 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE148nnn/GSE148374/suppl/GSE148374_MacaqueM_count_matrix.csv.gz' -O data/macaM.csv.gz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1406746b",
   "metadata": {},
   "outputs": [],
   "source": [
    "!gunzip ./data/*"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c97f3973",
   "metadata": {},
   "source": [
    "# Metadata file"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "94825bf2",
   "metadata": {},
   "source": [
    "The metadata file was downloaded from the single cell portal at: https://singlecell.broadinstitute.org/single_cell/study/SCP780/cell-atlas-of-aqueous-humor-outflow-pathways-in-eyes-of-humans-and-four-model-species-provides-insights-into-glaucoma-pathogenesis#study-summary\n",
    "\n",
    "We have included it in the repo at `./data/all_five_species_metafile.csv`."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "69e33bf2",
   "metadata": {},
   "source": [
    "# Convert to Scanpy h5ads"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "61ddf85b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import scanpy as sc\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "b93ca69a",
   "metadata": {},
   "outputs": [],
   "source": [
    "pig_path = \"data/pig.csv\"\n",
    "macaM_path = \"data/macaM.csv\"\n",
    "macaF_path = \"data/macaF.csv\"\n",
    "human_path = \"data/human.csv\"\n",
    "mouse_path = \"data/mouse.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "da00f2b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "meta_path = \"data/all_five_species_metafile.csv\"\n",
    "meta = pd.read_csv(meta_path)\n",
    "meta = meta.drop(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "1dd56d06",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NAME</th>\n",
       "      <th>Cluster</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>H1TMS1_AAACCTGAGCGTTCCG-1</td>\n",
       "      <td>Macrophage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>H1TMS1_AAACCTGAGGTAGCTG-1</td>\n",
       "      <td>SchwannCell-nmy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>H1TMS1_AAACCTGAGTTGTAGA-1</td>\n",
       "      <td>Pericyte</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>H1TMS1_AAACCTGCAGCTGTAT-1</td>\n",
       "      <td>SchwannCell-nmy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>H1TMS1_AAACCTGCAGGAATCG-1</td>\n",
       "      <td>BeamCella</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        NAME          Cluster\n",
       "1  H1TMS1_AAACCTGAGCGTTCCG-1       Macrophage\n",
       "2  H1TMS1_AAACCTGAGGTAGCTG-1  SchwannCell-nmy\n",
       "3  H1TMS1_AAACCTGAGTTGTAGA-1         Pericyte\n",
       "4  H1TMS1_AAACCTGCAGCTGTAT-1  SchwannCell-nmy\n",
       "5  H1TMS1_AAACCTGCAGGAATCG-1        BeamCella"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "27f5165f",
   "metadata": {},
   "source": [
    "### Pig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "42aef4e4",
   "metadata": {},
   "outputs": [],
   "source": [
    "pig_ad = sc.read(pig_path).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "dccef17b",
   "metadata": {},
   "outputs": [],
   "source": [
    "pig_ad.obs = meta.set_index(\"NAME\").loc[pig_ad.obs_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "0cea9431",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 1., 1., ..., 0., 0., 0.],\n",
       "       [0., 2., 0., ..., 0., 0., 0.],\n",
       "       [0., 4., 0., ..., 0., 0., 0.],\n",
       "       ...,\n",
       "       [0., 2., 0., ..., 0., 0., 0.],\n",
       "       [0., 1., 0., ..., 0., 0., 0.],\n",
       "       [0., 1., 0., ..., 0., 0., 0.]], dtype=float32)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pig_ad.X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "8896381f",
   "metadata": {},
   "outputs": [],
   "source": [
    "pig_ad.obs[\"cell_type\"] = pig_ad.obs[\"Cluster\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "c7de5d8f",
   "metadata": {},
   "outputs": [],
   "source": [
    "pig_ad.write(pig_path.replace(\"csv\", \"h5ad\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0aa471e8",
   "metadata": {},
   "source": [
    "### Mouse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "2bb022d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "mouse_ad = sc.read(mouse_path).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "c575fd00",
   "metadata": {},
   "outputs": [],
   "source": [
    "mouse_ad.obs = meta.set_index(\"NAME\").loc[mouse_ad.obs_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "c4e2ab2d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       ...,\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mouse_ad.X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "1f5697ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "mouse_ad.obs[\"cell_type\"] = mouse_ad.obs[\"Cluster\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "c607da90",
   "metadata": {},
   "outputs": [],
   "source": [
    "mouse_ad.write(mouse_path.replace(\"csv\", \"h5ad\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2f649ea2",
   "metadata": {},
   "source": [
    "### Human"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "43269107",
   "metadata": {},
   "outputs": [],
   "source": [
    "human_ad = sc.read(human_path).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "b9b58358",
   "metadata": {},
   "outputs": [],
   "source": [
    "human_ad.obs = meta.set_index(\"NAME\").loc[human_ad.obs_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "4778ee92",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 1., 0.],\n",
       "       ...,\n",
       "       [0., 0., 0., ..., 1., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 4., 0.]], dtype=float32)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "human_ad.X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "9b97f7b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "human_ad.obs[\"cell_type\"] = human_ad.obs[\"Cluster\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "64ec99d3",
   "metadata": {},
   "outputs": [],
   "source": [
    "human_ad.write(human_path.replace(\"csv\", \"h5ad\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59723d57",
   "metadata": {},
   "source": [
    "### MacaM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "e59e3a7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "macaM_ad = sc.read(macaM_path).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "c25a3a17",
   "metadata": {},
   "outputs": [],
   "source": [
    "macaM_ad.obs = meta.set_index(\"NAME\").loc[macaM_ad.obs_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "1f697c19",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       ...,\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "macaM_ad.X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "c2651fb6",
   "metadata": {},
   "outputs": [],
   "source": [
    "macaM_ad.obs[\"cell_type\"] = macaM_ad.obs[\"Cluster\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "3228c121",
   "metadata": {},
   "outputs": [],
   "source": [
    "macaM_ad.write(macaM_path.replace(\"csv\", \"h5ad\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ab43517",
   "metadata": {},
   "source": [
    "### MacaF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "fa2156d5",
   "metadata": {},
   "outputs": [],
   "source": [
    "macaF_ad = sc.read(macaF_path).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "7f566019",
   "metadata": {},
   "outputs": [],
   "source": [
    "macaF_ad.obs = meta.set_index(\"NAME\").loc[macaF_ad.obs_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "bda65bd0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       ...,\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "macaF_ad.X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "f403c663",
   "metadata": {},
   "outputs": [],
   "source": [
    "macaF_ad.obs[\"cell_type\"] = macaF_ad.obs[\"Cluster\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "6f1889b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "macaF_ad.write(macaF_path.replace(\"csv\", \"h5ad\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "3280d803",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Cluster</th>\n",
       "      <th>cell_type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>MacaCWLS1_AAACCTGAGCTAACAA-1</th>\n",
       "      <td>5_Schwann cell</td>\n",
       "      <td>5_Schwann cell</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MacaCWLS1_AAACCTGAGTGATCGG-1</th>\n",
       "      <td>2_BeamA</td>\n",
       "      <td>2_BeamA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MacaCWLS1_AAACCTGCACCAGGCT-1</th>\n",
       "      <td>2_BeamA</td>\n",
       "      <td>2_BeamA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MacaCWLS1_AAACCTGCATGGGAAC-1</th>\n",
       "      <td>15_Beam X</td>\n",
       "      <td>15_Beam X</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MacaCWLS1_AAACCTGTCGGAAACG-1</th>\n",
       "      <td>1_Corneal epithelium</td>\n",
       "      <td>1_Corneal epithelium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MacaTMRimLS1_TTTGTTGGTGAACCGA-1</th>\n",
       "      <td>17_Corneal epithelium</td>\n",
       "      <td>17_Corneal epithelium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MacaTMRimLS1_TTTGTTGTCAACCTTT-1</th>\n",
       "      <td>1_Corneal epithelium</td>\n",
       "      <td>1_Corneal epithelium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MacaTMRimLS1_TTTGTTGTCACGGGCT-1</th>\n",
       "      <td>1_Corneal epithelium</td>\n",
       "      <td>1_Corneal epithelium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MacaTMRimLS1_TTTGTTGTCGAGTTGT-1</th>\n",
       "      <td>3_Macrophage</td>\n",
       "      <td>3_Macrophage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MacaTMRimLS1_TTTGTTGTCGCAGTGC-1</th>\n",
       "      <td>1_Corneal epithelium</td>\n",
       "      <td>1_Corneal epithelium</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9155 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               Cluster              cell_type\n",
       "MacaCWLS1_AAACCTGAGCTAACAA-1            5_Schwann cell         5_Schwann cell\n",
       "MacaCWLS1_AAACCTGAGTGATCGG-1                   2_BeamA                2_BeamA\n",
       "MacaCWLS1_AAACCTGCACCAGGCT-1                   2_BeamA                2_BeamA\n",
       "MacaCWLS1_AAACCTGCATGGGAAC-1                 15_Beam X              15_Beam X\n",
       "MacaCWLS1_AAACCTGTCGGAAACG-1      1_Corneal epithelium   1_Corneal epithelium\n",
       "...                                                ...                    ...\n",
       "MacaTMRimLS1_TTTGTTGGTGAACCGA-1  17_Corneal epithelium  17_Corneal epithelium\n",
       "MacaTMRimLS1_TTTGTTGTCAACCTTT-1   1_Corneal epithelium   1_Corneal epithelium\n",
       "MacaTMRimLS1_TTTGTTGTCACGGGCT-1   1_Corneal epithelium   1_Corneal epithelium\n",
       "MacaTMRimLS1_TTTGTTGTCGAGTTGT-1           3_Macrophage           3_Macrophage\n",
       "MacaTMRimLS1_TTTGTTGTCGCAGTGC-1   1_Corneal epithelium   1_Corneal epithelium\n",
       "\n",
       "[9155 rows x 2 columns]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "macaF_ad.obs"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "253f0589",
   "metadata": {},
   "source": [
    "### Double Check All"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "fa1acbca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AnnData object with n_obs × n_vars = 6709 × 25880\n",
       "    obs: 'Cluster', 'cell_type'"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "AnnData object with n_obs × n_vars = 5067 × 31053\n",
       "    obs: 'Cluster', 'cell_type'"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "AnnData object with n_obs × n_vars = 24023 × 33660\n",
       "    obs: 'Cluster', 'cell_type'"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "AnnData object with n_obs × n_vars = 5158 × 32386\n",
       "    obs: 'Cluster', 'cell_type'"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "AnnData object with n_obs × n_vars = 9155 × 36162\n",
       "    obs: 'Cluster', 'cell_type'"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(pig_ad)\n",
    "display(mouse_ad)\n",
    "display(human_ad)\n",
    "display(macaM_ad)\n",
    "display(macaF_ad)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
